### File: 1_meta_analysis
### Purpose: replicates in-text findings on how often the analyzed papers 
### reported IRR and labeler demographics, plus all results in Appendix A
### Created: 12/5/2024

#######################
## required packages
#######################
require(tidyverse)
require(xtable)
require(irr)

#######################
### paths
#######################
main <- getwd()

data_dir <- str_c(main, "/data/")

results_dir <- str_c(main, "/results/")

#####################
### Read in the data
#######################

## two data sets required:

## this is the raw coding
master_data <- read_csv(str_c(data_dir, "meta_analysis_master.csv"))

## this reconciles differences between coders
clean_meta_data <- read_csv(str_c(data_dir, "clean_meta_data.csv"))

#####################
### Appendix A, Table 1
#######################

#### IRR for annotating of articles in the meta analysis

## save ids for papers checked by each pair of coders
## which ones done by Nora and John?
jn_ids <- master_data %>% 
  group_by(paper_id) %>% 
  summarize(coders = str_c(coder, collapse = "; ")) %>% 
  filter(coders == "Nora; John" | coders == "John; Nora") %>% 
  select(paper_id) %>% 
  pull()

## which ones done by Lucie and John?
jl_ids <- master_data %>% 
  group_by(paper_id) %>% 
  summarize(coders = str_c(coder, collapse = "; ")) %>% 
  filter(coders == "Lucie; John" | coders == "John; Lucie") %>% 
  select(paper_id) %>% 
  pull()

## which ones done by Lucie and Nora?
ln_ids_temp <- master_data %>% 
  group_by(paper_id) %>% 
  summarize(coders = str_c(coder, collapse = "; ")) %>% 
  filter(coders == "Lucie; Nora" | coders == "Nora; Lucie") %>% 
  select(paper_id) %>% 
  pull()
## Empty -- these are the papers without the tie-breaks

##########
## John/Nora comparison
irr_data_rel_jn <- master_data %>% 
  filter(paper_id %in% jn_ids) %>% 
  select(coder, relevant, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = relevant) %>% 
  select(-paper_id)

rel_kappa_jn <- irr::kappa2(irr_data_rel_jn)
rel_kappa_jn

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_rel_krip_jn <- master_data %>% 
  filter(paper_id %in% jn_ids) %>% 
  select(coder, relevant, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = relevant) %>% 
  select(-john) %>% 
  as.matrix()

rel_krip_jn <- irr::kripp.alpha(irr_data_rel_krip_jn)
rel_krip_jn

### IRR just on the "yes" question
irr_data_yes_jn <- master_data %>% 
  filter(paper_id %in% jn_ids) %>% 
  mutate(yes_clean = if_else(original_labeling == "Yes", 1, 0)) %>% 
  select(coder, yes_clean, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = yes_clean) %>% 
  select(-paper_id)

###
yes_kappa_jn <- irr::kappa2(irr_data_yes_jn)
yes_kappa_jn

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_yes_krip_jn <- master_data %>% 
  filter(paper_id %in% jn_ids) %>% 
  mutate(yes_clean = if_else(original_labeling == "Yes", 1, 0)) %>% 
  select(coder, yes_clean, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = yes_clean) %>% 
  select(-john) %>% 
  as.matrix()

yes_krip_jn <- irr::kripp.alpha(irr_data_yes_krip_jn)
yes_krip_jn

##############
## John/Lucie comparison
irr_data_rel_jl <- master_data %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, relevant, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = relevant) %>% 
  select(-paper_id)

### 
rel_kappa_jl <- irr::kappa2(irr_data_rel_jl)
rel_kappa_jl

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_rel_krip_jl <- master_data %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, relevant, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = relevant) %>% 
  select(-john) %>% 
  as.matrix()

rel_krip_jl <- irr::kripp.alpha(irr_data_rel_krip_jl)
rel_krip_jl

### IRR just on the "yes" question
irr_data_yes_jl <- master_data %>% 
  filter(paper_id %in% jl_ids) %>% 
  mutate(yes_clean = if_else(original_labeling == "Yes", 1, 0)) %>% 
  select(coder, yes_clean, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = yes_clean) %>% 
  select(-paper_id)

###
yes_kappa_jl <- irr::kappa2(irr_data_yes_jl)
yes_kappa_jl

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_yes_krip_jl <- master_data %>% 
  filter(paper_id %in% jl_ids) %>% 
  mutate(yes_clean = if_else(original_labeling == "Yes", 1, 0)) %>% 
  select(coder, yes_clean, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = yes_clean) %>% 
  select(-john) %>% 
  as.matrix()

yes_krip_jl <- irr::kripp.alpha(irr_data_yes_krip_jl)
yes_krip_jl

### make a nice table
irr_results <- tibble("Relevance measure" = c("Complex", "Complex",
                                              "Simple", "Simple"),
                      "Coder pair" = c("1-2", "1-3",
                                       "1-2", "1-3"),
                      "Cohen's kappa" = c(rel_kappa_jn$value, rel_kappa_jl$value,
                                          yes_kappa_jn$value, yes_kappa_jl$value),
                      "Krippendorff's alpha" = c(rel_krip_jn$value, rel_krip_jl$value, 
                                                 yes_krip_jn$value, yes_krip_jl$value))


print(xtable(irr_results, label = "table:meta_irr",
             caption = "IRR of Relevant Articles from Meta-Analysis"), 
      caption.placement = "top",
      include.rownames = FALSE)


#####################
### Appendix A, Table 2
#######################

### IRR on annotating for IRR and demographic information on coders

## subset to only the relevant articles, create variable for demographic info
relevant_demo <- clean_meta_data %>% 
  filter(relevant == 1) %>% 
  arrange(paper_id) %>% 
  mutate(has_demo = if_else(is.na(demo_labelers), 0, 1))


##  need to pull one more set of comparison article ids
ln_ids <- relevant_demo %>% 
  group_by(paper_id) %>% 
  summarize(coders = str_c(coder, collapse = "; ")) %>% 
  filter(coders == "Lucie; Nora" | coders == "Nora; Lucie") %>% 
  select(paper_id) %>% 
  pull()


########### our IRR on whether papers had IRR
## add the has_irr variable to the relevant dataframe

relevant_irr <- clean_meta_data %>% 
  filter(relevant == 1) %>% 
  arrange(paper_id) %>%  
  mutate(has_irr = case_when(is.na(first_var_irr_stat) ~ 0,
                             first_var_irr_stat == "none" ~ 0,
                             first_var_irr_stat == "dont really report IRR. ." ~ 0,
                             first_var_irr_stat == "only one coder" ~ 0,
                             first_var_irr_stat == "not reported/ another article is referenced" ~ 0,
                             TRUE ~ 1))

## reshape data for Cohen's kappa  
irr_data_irr_jl <- relevant_irr %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, has_irr, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = has_irr) %>% 
  select(-paper_id)

### around 0.67
irr_kappa_jl <- irr::kappa2(irr_data_irr_jl)
irr_kappa_jl

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_irr_krip_jl <- relevant_irr %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, has_irr, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_irr) %>% 
  select(-john) %>% 
  as.matrix()

irr_krip_jl <- irr::kripp.alpha(irr_data_irr_krip_jl)
irr_krip_jl #0.681

## John/Nora comparison
## add the has_irr variable to the relevant dataframe
## reshape data for Cohen's kappa  
irr_data_irr_jn <- relevant_irr %>% 
  filter(paper_id %in% jn_ids &
           !coder == "Lucie") %>% 
  select(coder, has_irr, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = has_irr) %>% 
  select(-paper_id) %>% 
  na.omit()

### 
irr_kappa_jn <- irr::kappa2(irr_data_irr_jn)
irr_kappa_jn #.76

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_irr_krip_jn <- relevant_irr %>% 
  filter(paper_id %in% jn_ids &
           !coder == "Lucie") %>% 
  select(coder, has_irr, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_irr) %>% 
  select(-john) %>% 
  as.matrix()

irr_krip_jn <- irr::kripp.alpha(irr_data_irr_krip_jn)
irr_krip_jn #0.77

## Lucie/Nora comparison -- from tie-breaks
## need a new list of the ones we double coded
## which ones done by Lucie and Nora?
ln_ids <- relevant_irr %>% 
  group_by(paper_id) %>% 
  summarize(coders = str_c(coder, collapse = "; ")) %>% 
  filter(coders == "Lucie; Nora" | coders == "Nora; Lucie") %>% 
  select(paper_id) %>% 
  pull()

## add the has_irr variable to the relevant dataframe
## reshape data for Cohen's kappa  
irr_data_irr_ln <- relevant_irr %>% 
  filter(paper_id %in% ln_ids) %>% 
  select(coder, has_irr, paper_id) %>% 
  mutate(lucie = if_else(coder == "Lucie", "Lucie", "Non-Lucie")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = lucie, values_from = has_irr) %>% 
  select(-paper_id) %>% 
  na.omit()

### 
irr_kappa_ln <- irr::kappa2(irr_data_irr_ln)
irr_kappa_ln #.33

## krippendorff's alpha requires different data structure
## each row is a rater
irr_data_irr_krip_ln <- relevant_irr %>% 
  filter(paper_id %in% ln_ids) %>%
  select(coder, has_irr, paper_id) %>% 
  mutate(lucie = if_else(coder == "Lucie", "Lucie", "Non-Lucie")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_irr) %>% 
  select(-lucie) %>% 
  as.matrix()

irr_krip_ln <- irr::kripp.alpha(irr_data_irr_krip_ln)
irr_krip_ln #0.375


################## On the demographic data

## reshape data for Cohen's kappa  
demo_data_irr_jl <- relevant_demo %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = has_demo) %>% 
  select(-paper_id)

### around 0.67
demo_kappa_jl <- irr::kappa2(demo_data_irr_jl)
demo_kappa_jl

## krippendorff's alpha requires different data structure
## each row is a rater
demo_data_irr_krip_jl <- relevant_demo %>% 
  filter(paper_id %in% jl_ids) %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_demo) %>% 
  select(-john) %>% 
  as.matrix()

demo_krip_jl <- irr::kripp.alpha(demo_data_irr_krip_jl)
demo_krip_jl #0.671

## John/Nora comparison
## add the has_irr variable to the relevant dataframe
## reshape data for Cohen's kappa  
demo_data_irr_jn <- relevant_demo %>% 
  filter(paper_id %in% jn_ids &
           !coder == "Lucie") %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = john, values_from = has_demo) %>% 
  select(-paper_id) %>% 
  na.omit()

### 
demo_kappa_jn <- irr::kappa2(demo_data_irr_jn)
demo_kappa_jn #.5

## krippendorff's alpha requires different data structure
## each row is a rater
demo_data_irr_krip_jn <- relevant_demo %>% 
  filter(paper_id %in% jn_ids &
           !coder == "Lucie") %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(john = if_else(coder == "John", "John", "Non-John")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_demo) %>% 
  select(-john) %>% 
  as.matrix()

demo_krip_jn <- irr::kripp.alpha(demo_data_irr_krip_jn)
demo_krip_jn #0.5


## Lucie/Nora comparison -- from tie-breaks
## add the has_irr variable to the relevant dataframe
## reshape data for Cohen's kappa  
demo_data_irr_ln <- relevant_demo %>% 
  filter(paper_id %in% ln_ids) %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(lucie = if_else(coder == "Lucie", "Lucie", "Non-Lucie")) %>%
  select(-coder) %>% 
  pivot_wider(names_from = lucie, values_from = has_demo) %>% 
  select(-paper_id) %>% 
  na.omit()

### 
demo_kappa_ln <- irr::kappa2(demo_data_irr_ln)
demo_kappa_ln #.33

## krippendorff's alpha requires different data structure
## each row is a rater
demo_data_irr_krip_ln <- relevant_demo %>% 
  filter(paper_id %in% ln_ids) %>% 
  select(coder, has_demo, paper_id) %>% 
  mutate(lucie = if_else(coder == "Lucie", "Lucie", "Non-Lucie")) %>% 
  select(-coder) %>% 
  pivot_wider(names_from = paper_id, values_from = has_demo) %>% 
  select(-lucie) %>% 
  as.matrix()

demo_krip_ln <- irr::kripp.alpha(demo_data_irr_krip_ln)
demo_krip_ln #0.375


### make a nice table
irr_results_irr_demo <- tibble("Measure" = c("Reports IRR", "Reports IRR",
                                             "Reports Demographics", "Reports Demographics"),
                               "Coder pair" = c("1-2", "1-3",
                                                "1-2", "1-3"),
                               "Cohen's kappa" = c(irr_kappa_jn$value, irr_kappa_jl$value,
                                                   demo_kappa_jn$value, demo_kappa_jl$value),
                               "Krippendorff's alpha" = c(irr_krip_jn$value, irr_krip_jl$value, 
                                                          demo_krip_jn$value, demo_krip_jl$value))


print(xtable(irr_results_irr_demo, label = "table:irr_for_irr_demo",
             caption = "IRR of Reporting IRR and Demographis in Meta-Analysis Articles"), 
      caption.placement = "top",
      include.rownames = FALSE)


#####################
### Appendix A, Table 3
#######################

collapse_intermed <- clean_meta_data %>% 
  filter(relevant == 1) %>% 
  arrange(paper_id) %>%
  select(paper_id, demo_labelers) %>% 
  group_by(paper_id) %>% 
  summarize(demo_labelers = str_c(str_replace_na(demo_labelers), 
                                  collapse = "; ")) %>% 
  ungroup()

########
## demographics mentions

demo_clean <- collapse_intermed %>% 
  select(demo_labelers) %>% 
  mutate(has_demo = case_when(demo_labelers == "NA; NA" ~ 0,
                              demo_labelers == "NA" ~ 0,
                              TRUE ~ 1),
         demo_educ = if_else(str_detect(demo_labelers, "Education"), 1, 0),
         demo_partisan = if_else(str_detect(demo_labelers, "Party"), 1, 0),
         demo_language = if_else(str_detect(demo_labelers, "Language") |
                                   str_detect(demo_labelers, "speakers") |
                                   str_detect(demo_labelers, "readers") , 1, 0),
         demo_national = if_else(str_detect(demo_labelers, "National"), 1, 0),
         demo_race = if_else(str_detect(demo_labelers, "Race"), 1, 0),
         demo_gender = if_else(str_detect(demo_labelers, "Gender"), 1, 0))

### What percent of papers mentioned demographics of coders?
demo_pct <- demo_clean %>% 
  summarize(pct = sum(has_demo)/nrow(demo_clean)) %>% 
  pull()

demo_pct # 0.247

## the total count of papers
demo_count <- demo_clean %>% 
  summarize(count = sum(has_demo)) %>% 
  pull()

demo_count #24

## of those that DO mention demo, what pct mention education status?
educ_pct_demo <- sum(demo_clean$demo_educ)/sum(demo_clean$has_demo)
educ_pct_demo # 0.625        

## what about party ID
sum(demo_clean$demo_partisan) # Just 1, 0.042 percent
party_pct_demo <- sum(demo_clean$demo_partisan)/sum(demo_clean$has_demo)
party_pct_demo

## language
lang_pct_demo <- sum(demo_clean$demo_language)/sum(demo_clean$has_demo)
lang_pct_demo

## race
race_pct_demo <- sum(demo_clean$demo_race)/sum(demo_clean$has_demo)
race_pct_demo

## gender
gender_pct_demo <- sum(demo_clean$demo_gender)/sum(demo_clean$has_demo)
gender_pct_demo

## nationality
nation_pct_demo <- sum(demo_clean$demo_national)/sum(demo_clean$has_demo)
nation_pct_demo

## make a nice table
demo_results <- tibble("Characteristic" = c("Education",
                                            "Language",
                                            "Race",
                                            "Nation",
                                            "Gender",
                                            "Partisanship"),
                       "Percent of Papers" = c(educ_pct_demo,
                                               lang_pct_demo,
                                               race_pct_demo,
                                               nation_pct_demo,
                                               gender_pct_demo,
                                               party_pct_demo))


print(xtable(demo_results, label = "table:demo_results",
             caption = "Percent of Characteristics Mentioned in Articles with Demographic Information"), 
      caption.placement = "top",
      include.rownames = FALSE)

